#sessionInfo()
#packageVersion("Seurat")
#packageVersion("spacexr")
#packageVersion("clusterProfiler")
#packageVersion("SpatialPCA")
#packageVersion("igraph")
#packageVersion("harmony")
#packageVersion("ggplot2")
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
library(stringr)
library(ggrepel)
library(RColorBrewer)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggsignif)
library(readr)
library(spacexr)
library(scatterpie)
library(pheatmap)
library(SpatialPCA)
library(glmGamPoi)
library(harmony)
library(presto)
library(igraph)
library(plotly)
path = "/beegfs/scratch/ric.hsr/Gaia_saldarini/FINAL/"
samples_tot = c("A1", "B1", "A3", "C1", "D1", "C2", "D2", "B3", "C3", "D3")
reds = brewer.pal(7, "Reds")
blues = brewer.pal(3, "Blues")
cols_samples_tot = c(blues, reds)
names(cols_samples_tot) = samples_tot
# le old allineate sul nuovo genoma (con le 67 probes) sono in /beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lorè/LORE_probe/old_slice
# maschere già messe
slice_1_ctrl = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/old_slice/A11SPA/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "A1",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
slice_2_ctrl = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/old_slice/V12N16307B1_noair/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "B1",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
slice_3_dual = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/old_slice/V12N16307C1_noair/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "C1",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
slice_4_dual = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/old_slice/D16SPA/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "D1",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
slice_5_dual = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/old_slice/A16SPA/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "C2",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
slice_6_dual = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/old_slice/C16SPAL/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "D2",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
# nuove
AB = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/AB/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "A3",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
AS = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/AS/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "A3",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
B = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/B1/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "B3",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
CB = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/CB/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "C3",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
CS = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/CS/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "C3",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
DB = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/DB/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "D3",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
DS = Load10X_Spatial(
data.dir = "/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/DS/outs",
filename = "raw_feature_bc_matrix.h5",
assay = "Spatial",
slice = "D3",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL
)
# B = beads
# S = surnatant
# 2 different sequencings
unify_B_S = function(a, b){
# controllo che stessi pixel, altrimenti prendo quelli in comune
common_pixels = intersect(colnames(a), colnames(b))
common_genes = intersect(rownames(a), rownames(b))
a = a[common_genes, common_pixels]
b = b[common_genes, common_pixels]
a@assays$Spatial$counts = a@assays$Spatial$counts + b@assays$Spatial$counts
a$nCount_Spatial = colSums(a@assays$Spatial$counts)
a$nFeature_Spatial = colSums(a@assays$Spatial$counts > 0)
return (a)
}
slide_A = unify_B_S(AB, AS)
slide_B = B
slide_C = unify_B_S(CB, CS)
slide_D = unify_B_S(DB, DS)
# prendere solo pixels dove c'è tessuto e trascrittomica
slice_1_ctrl = slice_1_ctrl[, rownames(GetTissueCoordinates(slice_1_ctrl))]
slice_2_ctrl = slice_2_ctrl[, rownames(GetTissueCoordinates(slice_2_ctrl))]
slice_3_dual = slice_3_dual[, rownames(GetTissueCoordinates(slice_3_dual))]
slice_4_dual = slice_4_dual[, rownames(GetTissueCoordinates(slice_4_dual))]
slice_5_dual = slice_5_dual[, rownames(GetTissueCoordinates(slice_5_dual))]
slice_6_dual = slice_6_dual[, rownames(GetTissueCoordinates(slice_6_dual))]
slide_A = slide_A[, rownames(GetTissueCoordinates(slide_A))]
slide_B = slide_B[, rownames(GetTissueCoordinates(slide_B))]
slide_C = slide_C[, rownames(GetTissueCoordinates(slide_C))]
slide_D = slide_D[, rownames(GetTissueCoordinates(slide_D))]
# MASCHERE (i vecchi le hanno già )
# da terminale nella cartella dove ci sono le cartelle di maschere: /beegfs/scratch/ric.cirillo.transcriptomics/saldarini.gaia/A1-1SPA_30724/
#for i in *.json; do cat ${i} |sed 's/},{/\n/g' | grep "tissue" | cut -f3,4 -d ',' | sed 's/"row"://g;s/,.*:/\t/g' | awk '{print $2+1, $1+1}' > ${i}.tab; done
# questo crea nella cartella dove c'è il json un file json.tab con riga e colonna dei pixel validi. confronto con space ranger coordinates e tengo solo quelle valide con filtro
read_delim('/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/analysis/visium-v2_coordinates.txt',delim='\t',col_names = c('barcode','col','row'))->barcodes
read_delim("/beegfs/scratch/ric.cirillo.transcriptomics/saldarini.gaia/airways_masks_mouse_new/V13F06-008-A1_noairways.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes = as.list(selected_barcodes)
selected_barcodes = lapply(selected_barcodes, function(x) paste0(x, "-1"))
selected_barcodes = unlist(selected_barcodes)
dim(slide_A)[2]
## [1] 2813
length(selected_barcodes)
## [1] 2535
slide_A <- slide_A[,selected_barcodes]
read_delim("/beegfs/scratch/ric.cirillo.transcriptomics/saldarini.gaia/airways_masks_mouse_new/V13F06-008-B1_noairways.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes = as.list(selected_barcodes)
selected_barcodes = lapply(selected_barcodes, function(x) paste0(x, "-1"))
selected_barcodes = unlist(selected_barcodes)
dim(slide_B)[2]
## [1] 3510
length(selected_barcodes)
## [1] 3404
slide_B <- slide_B[,selected_barcodes]
read_delim("/beegfs/scratch/ric.cirillo.transcriptomics/saldarini.gaia/airways_masks_mouse_new/V13F06-008-C1_noairways.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes = as.list(selected_barcodes)
selected_barcodes = lapply(selected_barcodes, function(x) paste0(x, "-1"))
selected_barcodes = unlist(selected_barcodes)
dim(slide_C)[2]
## [1] 3710
length(selected_barcodes)
## [1] 3597
slide_C <- slide_C[,selected_barcodes]
read_delim("/beegfs/scratch/ric.cirillo.transcriptomics/saldarini.gaia/airways_masks_mouse_new/V13F06-008-D1_noairways.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes = as.list(selected_barcodes)
selected_barcodes = lapply(selected_barcodes, function(x) paste0(x, "-1"))
selected_barcodes = unlist(selected_barcodes)
dim(slide_D)[2]
## [1] 3501
length(selected_barcodes)
## [1] 3408
slide_D <- slide_D[,selected_barcodes]
dim(GetTissueCoordinates(slice_3_dual))
## [1] 3225 2
dim(slice_3_dual@images$C1@coordinates)
## [1] 3225 5
# tutte le coordinate sono già filtrate
colnames(slice_1_ctrl) = paste0(colnames(slice_1_ctrl), "_", "A1")
colnames(slice_2_ctrl) = paste0(colnames(slice_2_ctrl), "_", "B1")
colnames(slice_3_dual) = paste0(colnames(slice_3_dual), "_", "C1")
colnames(slice_4_dual) = paste0(colnames(slice_4_dual), "_", "D1")
colnames(slice_5_dual) = paste0(colnames(slice_5_dual), "_", "C2")
colnames(slice_6_dual) = paste0(colnames(slice_6_dual), "_", "D2")
colnames(slide_A) = paste0(colnames(slide_A), "_", "A3")
colnames(slide_B) = paste0(colnames(slide_B), "_", "B3")
colnames(slide_C) = paste0(colnames(slide_C), "_", "C3")
colnames(slide_D) = paste0(colnames(slide_D), "_", "D3")
slices_list = list("A1" = slice_1_ctrl, "B1" = slice_2_ctrl, "A3" = slide_A, "C1" = slice_3_dual, "D1" = slice_4_dual, "C2" = slice_5_dual, "D2" = slice_6_dual, "B3" = slide_B, "C3" = slide_C, "D3" = slide_D)
for (i in 1:10){
slices_list[[i]]$orig.ident = names(slices_list)[i]
}
slices_merged = merge(slices_list[[1]], y = c(slices_list[[2]], slices_list[[3]], slices_list[[4]], slices_list[[5]], slices_list[[6]], slices_list[[7]], slices_list[[8]], slices_list[[9]], slices_list[[10]]))
slices_merged$orig.ident = substr(colnames(slices_merged), nchar(colnames(slices_merged))-1, nchar(colnames(slices_merged)))
slices_merged$orig.ident = factor(slices_merged$orig.ident, levels = names(cols_samples_tot))
Idents(slices_merged) = slices_merged$orig.ident
table(slices_merged$orig.ident)
##
## A1 B1 A3 C1 D1 C2 D2 B3 C3 D3
## 2452 2727 2532 3225 2896 3349 3413 3398 3585 3399
slices_merged$batch = as.factor(substr(colnames(slices_merged), nchar(colnames(slices_merged)), nchar(colnames(slices_merged)))) # ultime lettere delle colnames
table(slices_merged$batch)
##
## 1 2 3
## 11300 6762 12914
n_slices = length(slices_list)
for (i in 1:n_slices){
slices_list[[i]]$batch = as.factor(substr(colnames(slices_list[[i]]), nchar(colnames(slices_list[[i]])), nchar(colnames(slices_list[[i]]))))
}
SpatialDimPlot(slices_merged, cols = cols_samples_tot, ncol = 5) & NoLegend()
ggsave(paste0(path, "quality/q00.png"), width = 10, height = 5)
saveRDS(slices_merged, paste0(path, "data/slices_merged_unfiltered.rds"))
saveRDS(slices_list, paste0(path, "data/slices_list_unfiltered.rds"))
Applicare sia su list, sia su merged
MABS_genes = read.csv("/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe2/MABS_genes", sep = " ", header = FALSE)
MABS_genes = MABS_genes$V2
MABS_genes = unlist(lapply(MABS_genes, function(x) {gsub("_", "-", x)}))
MABS_genes = c(unique(MABS_genes), "MAB-3731c", "MAB-4532c")
MABS_genes = setdiff(MABS_genes, "rrs")
write.csv(MABS_genes, paste0(path, "/data/MABS_genes.csv"), row.names = FALSE)
# ricordarsi di aggiungere rrs se servisse
length(MABS_genes)
## [1] 66
n_slices = 10
for (i in 1:n_slices){
# filtro su geni dell'host
genes_filtered = rownames(slices_list[[i]])[rowSums(slices_list[[i]]@assays$Spatial$counts)>2] # filtro genes: detected in at least 3 cells
# aggiungo MABS, perchè c'è il rischio di averli persi con il filtro
genes_filtered = unique(c(genes_filtered, MABS_genes))
slices_list[[i]] =slices_list[[i]][genes_filtered,]
# no filtro su spots a priori
}
gene_list <- lapply(slices_list, function(x) rownames(x))
total_genes <- Reduce(intersect, gene_list)
total_genes = unique(total_genes)
length(total_genes)
## [1] 20631
slices_merged = slices_merged[total_genes, ]
## Warning: Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
## Not validating Seurat objects
host_genes = setdiff(total_genes, MABS_genes)
host_genes[startsWith(host_genes, "MAB-")]
## character(0)
length(host_genes)
## [1] 20565
write.csv(host_genes, paste0(path, "/data/host_genes.csv"), row.names = FALSE)
mt_genes = c("Mtif2", "Mto1", "Mterf2", "Mtres1", "Mtg1", "Mtg2", "Mtch1", "Mtrf1l", "Mtfmt", "Mtrf1", "Mtfp1", "Mtpap", "Mterf4", "Mtif3", "Mtfr1", "Mterf3", "Mtarc1", "Mterf1b", "Mtx3", "Mterf1a", "Mtfr2")
mt_genes = total_genes[na.omit(match(mt_genes, total_genes))]
write.csv(mt_genes, paste0(path, "/data/mt_genes.csv"), row.names = FALSE)
MABS_genes = read.csv(paste0(path, "/data/MABS_genes.csv"))[[1]]
host_genes = read.csv(paste0(path, "/data/host_genes.csv"))[[1]]
mt_genes = read.csv(paste0(path, "/data/mt_genes.csv"))[[1]]
# before filtering
num_spots = sapply(slices_list, ncol)
n_slices = length(slices_list)
# nFeatures
# nCounts
# % mitochondrial
# scatterplot nCounts % mitochondrial
for (i in 1:n_slices){
slices_list[[i]][["percent.mt"]] <- PercentageFeatureSet(slices_list[[i]], features = mt_genes)
}
slices_merged[["percent.mt"]] <- PercentageFeatureSet(slices_merged, features = mt_genes)
VlnPlot(slices_merged, features = c("nFeature_Spatial", "nCount_Spatial", "percent.mt"), pt.size=0, add.noise = TRUE, split.by = "orig.ident", cols = cols_samples_tot, ncol = 1) & theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) & geom_hline(yintercept = 0.2, color = "red")
ggsave(paste0(path, "quality/q0.png"), width = 4, height = 9)
SpatialFeaturePlot(slices_merged, features = "nCount_Spatial", ncol = 5, min.cutoff = 2000, max.cutoff = 70000) + plot_layout(guides = "collect") &
guides(fill = guide_colorbar(direction = "vertical"))
ggsave(paste0(path, "quality/q1.png"), width = 10, height = 6)
SpatialFeaturePlot(slices_merged, features = "nFeature_Spatial", ncol = 5, min.cutoff = 2000, max.cutoff = 11000) + plot_layout(guides = "collect" ) &
guides(fill = guide_colorbar(direction = "vertical"))
ggsave(paste0(path, "quality/q2.png"), width = 10, height = 6)
SpatialFeaturePlot(slices_merged, features = "percent.mt", ncol = 5, min.cutoff = 0, max.cutoff = 0.4) + plot_layout(guides = "collect") &
guides(fill = guide_colorbar(direction = "vertical"))
ggsave(paste0(path, "quality/q3.png"), width = 10, height = 6)
FeatureScatter(slices_merged, feature1 = "nCount_Spatial", feature2 = "percent.mt", cols = cols_samples_tot) + geom_hline(yintercept = 0.2, color = "red")
ggsave(paste0(path, "quality/q4.png"))#, width = 10, height = 10)
FeatureScatter(slices_merged, feature1 = "nCount_Spatial", feature2 = "nFeature_Spatial", cols = cols_samples_tot)
ggsave(paste0(path, "quality/q5.png"))#, width = 10, height = 10)
for (i in 1:n_slices){
#print(dim(slices_list[[i]]))
slices_list[[i]] = subset(slices_list[[i]], subset = percent.mt < 0.2) # filter cells with percent mt >=0.2: threshold 0.2 da scatterplot
#print(dim(slices_list[[i]]))
}
slices_merged = subset(slices_merged, subset = percent.mt < 0.2)
D2
png(paste0(path, "quality/q6.png"))
hist(slices_list[["D2"]]$nCount_Spatial, breaks = 70, main = "D2 counts per spot distribution")
abline(v = 10000, col = "red", lwd = 2)
text(x = 15000, y = 300, labels = "10k", col = "red", pos = 3)
dev.off()
## png
## 2
table(slices_list[["D2"]]$nCount_Spatial > 10000)
##
## FALSE TRUE
## 770 2642
slices_list[["D2"]]$quality = "good_quality"
slices_list[["D2"]]$quality[slices_list[["D2"]]$nCount_Spatial < 10000] = "low_quality"
col_quality = c("green3", "red3")
names(col_quality) = c("good_quality", "low_quality")
SpatialDimPlot(slices_list[["D2"]], group.by = "quality", cols = col_quality, alpha = 0.5)
ggsave(paste0(path, "quality/q7.png"))
SpatialDimPlot(slices_list[["D2"]][, slices_list[["D2"]]$nCount_Spatial > 10000]) & NoLegend()
## Warning: Not validating Seurat objects
ggsave(paste0(path, "quality/quality_D2_filter.png"))
D2_low_quality = colnames(slices_list[["D2"]])[slices_list[["D2"]]$nCount_Spatial < 10000]
#length(D2_low_quality)
write.csv(D2_low_quality, paste0(path, "data/D2_low_quality.csv"), row.names = FALSE)
slices_list[["D2"]] = slices_list[["D2"]][,! (colnames(slices_list[["D2"]]) %in% D2_low_quality)]
slices_merged = slices_merged[,! (colnames(slices_merged) %in% D2_low_quality)]
perc_removed = NULL
for (i in 1:n_slices){
perc_removed[i] = (num_spots[i] - ncol(slices_list[[i]]))/num_spots[i]
}
perc_removed
## [1] 0.0040783034 0.0014668133 0.0007898894 0.0009302326 0.0013812155
## [6] 0.0000000000 0.2259009669 0.0005885815 0.0005578801 0.0005884084
read_delim('/beegfs/scratch/ric.cirillo.transcriptomics/ric.cirillo.transcriptomics/Progetti_Lore/LORE_probe/analysis/visium-v2_coordinates.txt',delim='\t',col_names = c('barcode','col','row'))->barcodes
read_delim("/beegfs/scratch/ric.hsr/Gaia_saldarini/new_slices_10/beads/V12N16-307-C1_.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes_C1 = paste0(selected_barcodes, "-1_C1")
read_delim("/beegfs/scratch/ric.hsr/Gaia_saldarini/new_slices_10/beads/V12N16-307-D1_.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes_D1 = paste0(selected_barcodes, "-1_D1")
read_delim("/beegfs/scratch/ric.hsr/Gaia_saldarini/new_slices_10/beads/V13J17-404_A1airwaysinflamed.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes_C2 = paste0(selected_barcodes, "-1_C2")
read_delim("/beegfs/scratch/ric.hsr/Gaia_saldarini/new_slices_10/beads/V13J17-404_C1airwaysinflamed.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes_D2 = paste0(selected_barcodes, "-1_D2")
read_delim("/beegfs/scratch/ric.hsr/Gaia_saldarini/new_slices_10/beads/V13F06-008_B1airwaysinflamed.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes_B3 = paste0(selected_barcodes, "-1_B3")
read_delim("/beegfs/scratch/ric.hsr/Gaia_saldarini/new_slices_10/beads/V13F06-008_C1airwaysinflamed.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes_C3 = paste0(selected_barcodes, "-1_C3")
read_delim("/beegfs/scratch/ric.hsr/Gaia_saldarini/new_slices_10/beads/V13F06-008_D1airwaysinflamed.json.tab",delim=' ',col_names = c('col','row')) %>%
left_join(barcodes) %>%
pull(barcode)->selected_barcodes
selected_barcodes_D3 = paste0(selected_barcodes, "-1_D3")
selected_barcodes = c(selected_barcodes_C1, selected_barcodes_D1, selected_barcodes_C2, selected_barcodes_D2, selected_barcodes_B3, selected_barcodes_C3, selected_barcodes_D3)
slices_merged$beads = "Tissue"
slices_merged$beads[selected_barcodes] = "Airways_containing_beads"
cols_beads = c("green", "darkblue")
names(cols_beads) = c("Tissue", "Airways_containing_beads")
samples_infected = c("C1", "D1", "C2", "D2", "B3", "C3", "D3")
SpatialDimPlot(slices_merged, group.by = "beads", ncol = 5, alpha = 0.35, cols = cols_beads) + plot_layout(guides = "collect")
ggsave(paste0(path, "quality/beads_7.png"), height = 6, width = 10)
sel_barcodes_list = list(selected_barcodes_C1, selected_barcodes_D1, selected_barcodes_C2, selected_barcodes_D2, selected_barcodes_B3, selected_barcodes_C3, selected_barcodes_D3)
#length(sel_barcodes_list)
samples_infected = c("C1", "D1", "C2", "D2", "B3", "C3", "D3")
for (i in 1:7){
slices_list[[samples_infected[[i]]]]$beads = "Tissue"
slices_list[[samples_infected[[i]]]]$beads[sel_barcodes_list[[i]]] = "Airways_containing_beads"
}
write.csv(selected_barcodes, paste0(path, "data/beads.csv"), row.names = FALSE)
for (i in 1:length(slices_list)){
slices_list[[i]]$condition = as.factor(ifelse(slices_list[[i]]$orig.ident %in% samples_infected, "Infected", "Ctrl"))
}
slices_merged$condition = as.factor(ifelse(slices_merged$orig.ident %in% samples_infected, "Infected", "Ctrl"))
MABS_genes = read.csv(paste0(path, "/data/MABS_genes.csv"))[[1]]
slices_merged = JoinLayers(slices_merged)
slices_merged$rpoB_counts = slices_merged[["Spatial"]]$counts["rpoB", ]
slices_merged$lsr2_counts = slices_merged[["Spatial"]]$counts["MAB-0545", ]
slices_merged$MABS_presence = ifelse((slices_merged$rpoB_counts + slices_merged$lsr2_counts) > 0,
"MABS", "no_mabs")
slices_merged$MABS_type = ifelse(slices_merged$MABS_presence == "no_mabs", "no_mabs",
ifelse(slices_merged$lsr2_counts > 0, "lsr2+", "lsr2-"))
slices_merged$MABS_tot = ifelse(colSums(slices_merged[["Spatial"]]$counts[MABS_genes, ]) > 0, "MABS", "no_mabs")
for (i in 4:length(slices_list)){
slices_list[[i]]$rpoB_counts = slices_list[[i]][["Spatial"]]$counts["rpoB", ]
slices_list[[i]]$lsr2_counts = slices_list[[i]][["Spatial"]]$counts["MAB-0545", ]
slices_list[[i]]$MABS_presence = ifelse((slices_list[[i]]$rpoB_counts + slices_list[[i]]$lsr2_counts) > 0,
"MABS", "no_mabs")
slices_list[[i]]$MABS_type = ifelse(slices_list[[i]]$MABS_presence == "no_mabs", "no_mabs",
ifelse(slices_list[[i]]$lsr2_counts > 0, "lsr2+", "lsr2-"))
slices_list[[i]]$MABS_tot = ifelse(colSums(slices_list[[i]][["Spatial"]]$counts[MABS_genes, ]) > 0, "MABS", "no_mabs")
}
samples_infected = c("C1", "D1", "C2", "D2", "B3", "C3", "D3")
cols_samples = rep("red2", 7) #brewer.pal(7, "Reds")
names(cols_samples) = samples_infected
cols_geni_batterici = c("#D95F02","#1B9E77", "lavenderblush1")
names(cols_geni_batterici) = c("lsr2+", "lsr2-", "no_mabs")
cols_mabs = c("firebrick3", "lavenderblush1")
names(cols_mabs) = c("MABS", "no_mabs")
cols_rpoB_lsr2 = c("#D95F02","#1B9E77")
names(cols_rpoB_lsr2) = c("lsr2", "rpoB")
infected = slices_merged[, slices_merged$condition == "Infected"]
SpatialDimPlot(infected, group.by = "MABS_presence", ncol = 2, cols = cols_mabs) + plot_layout(guides = "collect")
ggsave(paste0(path, "img/MABS_1.png"), height=10, width=10)
SpatialDimPlot(infected, group.by = "MABS_type", ncol = 2, cols = cols_geni_batterici) + plot_layout(guides = "collect")
ggsave(paste0(path, "img/MABS_2.png"), height=10, width=10)
SpatialDimPlot(infected, group.by = "MABS_tot", ncol = 2, cols = cols_mabs) + plot_layout(guides = "collect")
df_mabs = data.frame()
df_mabs_ratio = data.frame()
for (i in 1: length(samples_infected)){
xx = infected[, infected$orig.ident == samples_infected[i]]
r = data.frame("number_spots" = sum(xx$rpoB_counts > 0), "gene" = "rpoB", "sample" = samples_infected[i], "counts" = sum(xx$rpoB_counts), "number_spots_lsr2" = sum(xx$MABS_type == "lsr2-"), "perc_spots" = sum(xx$rpoB_counts > 0)/length(xx$rpoB_counts))
l = data.frame("number_spots" = sum(xx$lsr2_counts > 0), "gene" = "lsr2", "sample" = samples_infected[i], "counts" = sum(xx$lsr2_counts), "number_spots_lsr2" = sum(xx$MABS_type == "lsr2+"), "perc_spots" = sum(xx$lsr2_counts > 0)/length(xx$lsr2_counts))
df_mabs = rbind(df_mabs, rbind(r, l))
ratio_rpoB_lsr2 = data.frame("spots_ratio" = r$number_spots/l$number_spots, "sample" = samples_infected[i], "counts_ratio" = r$counts/l$counts, "spots_ratio_lsr2" = r$number_spots_lsr2/l$number_spots_lsr2)
df_mabs_ratio = rbind(df_mabs_ratio, ratio_rpoB_lsr2)
}
df_mabs$gene = factor(df_mabs$gene, levels = names(cols_rpoB_lsr2))
df_mabs$sample = factor(df_mabs$sample, levels = samples_infected)
df_mabs_ratio$sample = factor(df_mabs_ratio$sample, levels = samples_infected)
ggplot(data=df_mabs, aes(x = sample, y=number_spots, fill = gene)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab(NULL)+
ylab("# of spots") +
theme(panel.grid.minor = element_blank()) +
ggtitle("detection of bacterial transcripts")+
scale_fill_manual(values=cols_rpoB_lsr2)
ggsave(paste0(path, "img/MABS_3.png"))#, height=10, width=10)
## Saving 7 x 5 in image
ggplot(data=df_mabs, aes(x=sample, y=perc_spots, fill = gene)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab(NULL)+
ylab("% of spots per slice") +
theme(panel.grid.minor = element_blank()) +
ggtitle("detection of bacterial transcripts")+
scale_fill_manual(values=cols_rpoB_lsr2)
ggsave(paste0(path, "img/MABS_4.png"))
## Saving 7 x 5 in image
ggplot(data=df_mabs, aes(x = sample, y=counts, fill = gene)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab(NULL)+
ylab("# of counts") +
theme(panel.grid.minor = element_blank()) +
ggtitle("detection of bacterial transcripts")+
scale_fill_manual(values=cols_rpoB_lsr2)
ggsave(paste0(path, "img/MABS_6.png"))#, height=10, width=10)
## Saving 7 x 5 in image
ggplot(data=df_mabs, aes(x = sample, y=number_spots_lsr2, fill = gene)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab(NULL)+
ylab("# of spots lsr2+-") +
theme(panel.grid.minor = element_blank()) +
ggtitle("detection of bacterial transcripts")+
scale_fill_manual(values=cols_rpoB_lsr2)
ggsave(paste0(path, "img/MABS_6.png"))#, height=10, width=10)
## Saving 7 x 5 in image
ggplot(data=df_mabs_ratio, aes(x=sample, y=spots_ratio)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab(NULL)+
ylab("spots ratio") +
theme(panel.grid.minor = element_blank()) +
ggtitle("rpoB/lsr2 number of spots")
ggsave(paste0(path, "img/MABS_5.png"))
## Saving 7 x 5 in image
ggplot(data=df_mabs_ratio, aes(x=sample, y=counts_ratio)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab(NULL)+
ylab("counts ratio") +
theme(panel.grid.minor = element_blank()) +
ggtitle("rpoB/lsr2 number of spots")
ggsave(paste0(path, "img/MABS_5.png"))
## Saving 7 x 5 in image
ggplot(data=df_mabs_ratio, aes(x=sample, y=spots_ratio_lsr2)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab(NULL)+
ylab("spots ratio lsr2-/lsr2+") +
theme(panel.grid.minor = element_blank()) +
ggtitle("rpoB/lsr2 number of spots")
ggsave(paste0(path, "img/MABS_5.png"))
## Saving 7 x 5 in image
df = data.frame()
for (i in 4:length(slices_list)){
x = slices_list[[i]]
rpoB = sum(x$rpoB_counts)
lsr2 = sum(x$lsr2_counts)
rpoB_p = sum(x$rpoB_counts>0)
lsr2_p = sum(x$lsr2_counts>0)
mabs = rpoB+lsr2
mabs_p = sum(x$MABS_presence == "MABS")
ratio = rpoB/lsr2 #counts
ratio_spots = rpoB_p/lsr2_p
ratio_spots_lsr2 = sum(x$MABS_type == "lsr2-")/sum(x$MABS_type == "lsr2+")
spots = (mabs_p)/dim(x)[2] *100
df = rbind(df, c(rpoB, lsr2, mabs, mabs_p, ratio, ratio_spots, ratio_spots_lsr2, spots))
}
colnames(df) = c("rpoB_umi", "lsr2_umi", "mabs_umi", "mabs_spots", "ratio_counts", "ratio_spots", "ratio_spots_lsr2", "mabs_spots_%")
rownames(df) = samples_infected
df
## rpoB_umi lsr2_umi mabs_umi mabs_spots ratio_counts ratio_spots
## C1 60 37 97 68 1.621622 1.303030
## D1 74 22 96 63 3.363636 2.600000
## C2 184 69 253 114 2.666667 2.069767
## D2 142 69 211 115 2.057971 1.714286
## B3 54 24 78 42 2.250000 1.823529
## C3 160 75 235 100 2.133333 1.697674
## D3 169 72 241 96 2.347222 2.000000
## ratio_spots_lsr2 mabs_spots_%
## C1 1.060606 2.110490
## D1 2.150000 2.178423
## C2 1.651163 3.404001
## D2 1.346939 4.352763
## B3 1.470588 1.236749
## C3 1.325581 2.790957
## D3 1.594595 2.826023
list_deconvolution = readRDS(paste0(path, "data/deconvolution_list.rds"))
for (i in 1:10){
slices_list[[samples_tot[i]]][["RCTD"]] = CreateAssayObject(data = list_deconvolution[[samples_tot[i]]][["RCTD"]]$data[, colnames(slices_list[[samples_tot[i]]])])
}
celltypes_list <- lapply(list_deconvolution, function(df) {
df[["RCTD"]]$data
})
rctd_tot <- do.call(cbind, celltypes_list)
dim(rctd_tot)
## [1] 12 30976
dim(slices_merged)
## [1] 20631 30176
slices_merged[["RCTD"]] = CreateAssayObject(data = rctd_tot[, colnames(slices_merged)])
cell_types = rownames(slices_merged[["RCTD"]]$data)
for (j in 1:length(cell_types)){
slices_merged@meta.data[[cell_types[j]]] = t(slices_merged[["RCTD"]]$data)[colnames(slices_merged), cell_types[j]]
}
for (i in 1:10){
for (j in 1:length(cell_types)){
slices_list[[i]]@meta.data[[cell_types[j]]] = t(slices_list[[i]][["RCTD"]]$data)[colnames(slices_list[[i]]), cell_types[j]]
}
}
saveRDS(slices_list, paste0(path, "data/slices_list.rds"))
#slices_list = readRDS(paste0(path, "data/slices_list.rds"))
saveRDS(slices_merged, paste0(path, "data/slices_merged.rds"))
#slices_merged = readRDS(paste0(path, "data/slices_merged.rds"))